---
title: "Reproduction script for 'Power and false negatives in QCA', by Ingo Rohlfing"
output:
  html_document: default
  pdf_document: default
---

This script reproduces all figures, tables and quantities in the manuscript *without* running the simulations. The simulation results are loaded as data into RStudio (see readme-file for more information). If a code chunk starts with loading data, then this is data that were generated with the code in the previous chunk. (The chunk is therefore set to `eval = F` in Markdown.)

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

Packages required for reproduction
```{r library, message = F}
library(dplyr)
library(ggplot2)
library(ggforce)
library(tidyr)
library(combinat)
```

### Reproduction information
This script has been last executed on `r Sys.Date()` using:  

* OS: Windows10 Home (version 1607, 64 bit)
* `r R.version.string`  
* RStudio: 1.0.136
* `combinat`: `r packageVersion("combinat")`  
* `dplyr`: `r packageVersion("dplyr")`  
* `ggplot2`: `r packageVersion("ggplot2")`  
* `ggforce`: `r packageVersion("ggforce")`  
* `tidyr`: `r packageVersion("tidyr")`  

### Reproduction code
A function `qcapower()` for the simulation of power for one set of parameters.
```{r qcapower function, eval = F}
qcapower <- function(cases, null_hypo, alt_hypo, 
                     sims = 1000, perms = 10000,
                     alpha = 0.05, cons_threshold = 0.01, 
                     set_seed = 135) {
  null_sig <- vector("numeric", length = sims)
  null_p <- vector("numeric", length = sims)
  quant <- vector("numeric", length = sims)

  set.seed(set_seed)
  calc_consistency <- function(a, y) sum(pmin(a, y)) / sum(a)
  random_draw <- function() round(runif(cases), digits = 2)
  df <- data.frame(index=1:cases)

  for(i in 1:sims) {
    df$a <- random_draw()  
    df$y <- random_draw()
    cons <- calc_consistency(df$a, df$y)
    while(abs(alt_hypo - cons) > cons_threshold) {
      if(cons < alt_hypo) {
        sample_row <- sample(df[df$a > df$y, 'index'], 1)
        df[sample_row, 'y'] <- runif(1, min = df[sample_row, 'y'], max = 1)
      } else if(cons > alt_hypo) {
        sample_row <- sample(df[ , 'index'], 1)
        df[sample_row, 'y'] <- runif(1, min = 0, max = df[sample_row, 'y'])
      }
      cons <- calc_consistency(df$a, df$y)
    }
    cons_perms <- sapply(1:perms, function(x) calc_consistency(df$a, sample(df$y))) 
    quant[i] <- quantile(cons_perms, 0.05)
    null_p[i] <- ecdf(cons_perms)(null_hypo) 
    null_sig[i] <- ifelse(null_p[i] + 1.96 * null_p[i] / sqrt(perms) < alpha, 1, 0) 
  }
  power <- sum(null_sig) / sims  
  powercum <- cumsum(null_sig) / 1:sims  
  estim <- data.frame(power, powercum, alt_hypo, null_hypo, cases, quant)

  return(estim)
}
```

A function `calc_qcapower()` drawing on `qcapower()` for estimating power for all combinations of values of the alternative hypothesis, the null hypothesis (at least 0.05 consistency points lower than the alternative hypothesis) and number of cases (10 to 50 in steps of 10).
```{r calc_qcapower function, eval = F}
calc_qcapower <- function(alt_max, alt_min, alt_steps = 0.05, 
                          null_diff = 0.05, cases_seq = seq(10, 50, by=10)) {
  params <- list(set_seed = 111)
  
  cons_params <- rev(seq(from = alt_min, to = alt_max, by = alt_steps))
  null_params <- cons_params - null_diff
  n_params <- seq(from = 10, by = 10, length.out = length(cons_params))
  
  sim_params <- expand.grid(alt_hypo = cons_params, null_hypo = null_params, 
                            cases = n_params)
  sim_params <- sim_params[sim_params$alt_hypo > (sim_params$null_hypo + 1e-10), ]  
  sim_params_list <- apply(sim_params, 1, as.list)
  
  powerest_list <- lapply(sim_params_list, function(x) do.call(qcapower, 
                                                               modifyList(params, x)))
  powerest_all <- do.call(rbind, powerest_list)
  
  return(powerest_all)
}
```

Simulating power for all pre-specified combinations of cases, null hypotheses and alternative hypotheses. The number of simulations is 1000; the number of permutations per simulation is 10000; the range of values of the alternative hypothesis is 1 to 0.8 in steps of 0.05.
```{r simpower, eval = F}
s1k_p10k <- calc_qcapower(1, 0.8)
```

Loading the stored results of the previous chunk. Summarizing the simulation results in five panels (figure 2 in manuscript). The second command reorders the values of c(H1) such that they are descending.
```{r simresults, cache=T, fig.heigt=8}
load("s1k_p10k.RData") 
s1k_p10k <- s1k_p10k %>% 
  mutate(h1 = factor(alt_hypo, ordered = T)) %>% 
  mutate(h1 = factor(h1, levels = rev(levels(h1))))

ggplot(s1k_p10k, aes(null_hypo, power, group = cases)) +
  geom_point(aes(colour = cases)) + geom_line(aes(colour = cases), size = 0.5) +
  labs(x = expression('c'[H0]), y = "power estimate") +
  xlim(c(0.75, 0.95)) + ylim(c(0, 1)) + theme_bw() +
  facet_wrap(~h1, ncol = 2) + guides(color = guide_legend(order=1)) +
  theme(legend.position = c(0.8, 0.12)) + 
  ggtitle(expression('Figure 2: Power estimates for five values of c'[H1]*' and c'[H0])) 
```

Descriptive analysis of the 75 power estimates in a bar chart (figure 3 in manuscript).
```{r powerdesc}
qp.comb <- s1k_p10k[, c("power", "alt_hypo", "null_hypo", "cases")]
qp.collapse <- qp.comb %>% 
               group_by(alt_hypo, null_hypo, cases) %>%
               summarise_each(funs(mean(power)))

qp.collapse$powercat <- cut(qp.collapse$power, seq(0, 1, 0.1),
                            include.lowest = T, 
                            labels = c("0-.09", ".1-.19", ".2-.29", 
                                       ".3-.39", ".4-.49", ".5-.59", 
                                       ".6-.69", ".7-.79", ".8-.89", ".9-1"))
ggplot(qp.collapse, aes(factor(powercat))) + geom_bar() + theme_bw() + 
       labs(x = "intervals of power", y = "frequency") + 
  ggtitle("Figure 3: Summary of power estimates") 
```

The number of power estimates per category is (same information as in the bar chart):
```{r counts.cat, echo = -2}
counts.cat <- table(qp.collapse$powercat)
print(counts.cat)
```

The cumulative sum of power estimates from the lowest to the highest category as summarized in table 3.
```{r counts.sum, echo = -2}
counts.sum <- cumsum(counts.cat)
print(counts.sum)
```

A plot illustrating that 1000 simulations produce a reliable power estimate (figure 4 in manuscript).
```{r powercumul}
line1 <- filter(s1k_p10k, null_hypo == 0.75 & alt_hypo == 1 & cases == 10) %>%  
  mutate(id = c(seq(1:nrow(.))))
line2 <- filter(s1k_p10k, null_hypo == 0.75 & alt_hypo == 0.8 & cases == 50) %>% 
  mutate(id = c(seq(1:nrow(.))))
powerrun <- bind_rows(line1, line2) %>% 
  mutate(cases_chr = ifelse(cases == 10, "n=10, H1=1, H0=0.75", 
                            "n=50, H1=0.8, H0=0.75"))

ggplot(powerrun, aes(id, powercum, color = cases_chr)) + theme_bw() +
  geom_line() + ylim(c(0, 1)) + 
  labs(x = "simulation index", y = "running power estimate") +
  theme(legend.title = element_blank(),
        legend.position = "bottom") +
  scale_color_discrete(labels = c(expression('n=50, c'[H1]*'=1, c'[H0]*'=0.75'),
                     expression('n=10, c'[H1]*'=0.8, c'[H0]*'=0.75'))) +
  ggtitle("Figure 4: Running power estimate for two paramater constellations") 
```

The illustration of why power is estimated to be higher for fewer cases if the difference between c(H1) and c(H0) is small requires adapting the original `qcapower()` function (c(H1) and c(H0) are the consistency values of H1 and H0). The output of `qcapower_exp()` expands the original function and outputs the consistency value of each permuted distribution (parameter `cons_perms`).
```{r qcapower_exp function, eval = F}
qcapower_exp <- function(cases, null_hypo, alt_hypo, sims = 100, perms = 100,
                     alpha = 0.05, cons_threshold = 0.01, set_seed = 135) {
  null_sig <- vector("numeric", length = sims)
  null_p <- vector("numeric", length = sims)

  set.seed(set_seed)
  calc_consistency <- function(a, y) sum(pmin(a, y)) / sum(a)
  random_draw <- function() round(runif(cases), digits = 2)
  df <- data.frame(index=1:cases)

  for(i in 1:sims) {
    df$a <- random_draw()  
    df$y <- random_draw()
    cons <- calc_consistency(df$a, df$y)
    while(abs(alt_hypo - cons) > cons_threshold) {
      if(cons < alt_hypo) {
        sample_row <- sample(df[df$a > df$y, 'index'], 1)
        df[sample_row, 'y'] <- runif(1, min = df[sample_row, 'y'], max = 1)
      } else if(cons > alt_hypo) {
        sample_row <- sample(df[ , 'index'], 1)
        df[sample_row, 'y'] <- runif(1, min = 0, max = df[sample_row, 'y'])
      }
      cons <- calc_consistency(df$a, df$y)
    }
    cons_perms <- sapply(1:perms, function(x) calc_consistency(df$a, sample(df$y))) 
    null_p[i] <- ecdf(cons_perms)(null_hypo) # p-value of null-hypo
    null_sig[i] <- ifelse(null_p[i] + 1.96 * null_p[i] / sqrt(perms) < alpha, 1, 0) 
  }
  power <- sum(null_sig) / sims  
  powercum <- cumsum(null_sig) / 1:sims  
  estim <- data.frame(powercum, null_p, power, alt_hypo, null_hypo, cases, cons_perms)

  return(estim)
}
```

For illustrative purposes, it suffices to run 10000 permutations ten times for n=10 and n=50. `sims` is set to 1 in the `qcapower_exp()` function, but the function is applied to ten different numbers for `set_id`. This is equivalent to setting `set_seed` to one number and performing ten simulations.
```{r p10k_n10_n50, eval = F}
p10k_n10 <- data.frame(set_id = rep(1:10, 10000)) %>%
  arrange(set_id)
p10k_n10 <- p10k_n10 %>% 
  mutate(perms_n = 10000) %>%
  group_by(set_id) %>%
  mutate(consist = qcapower_exp(cases = 10, null_hypo = 0.8, 
                                         alt_hypo = 1, sims = 1, set_seed = set_id,
                                         perms = 10000)$cons_perms) %>%
  mutate(quantile05 = quantile(consist, 0.05)) %>%
  mutate(cases = "n=10") %>%
  ungroup(set_id)

p10k_n50 <- data.frame(set_id = rep(1:10, 10000)) %>%
  arrange(set_id)
p10k_n50 <- p10k_n50 %>% 
  mutate(perms_n = 10000) %>%
  group_by(set_id) %>%
  mutate(consist = qcapower_exp(cases = 50, null_hypo = 0.8, 
                                         alt_hypo = 1, sims = 1, set_seed = set_id,
                                         perms = 10000)$cons_perms) %>%
  mutate(quantile05 = quantile(consist, 0.05)) %>%
  mutate(cases = "n=50") %>%
  ungroup(set_id)
```

Loading the stored results of the previous chunk for a comparison of the permuted distributions for n =10 and n=50 (figure 5 in manuscript).
```{r perms10k}
load("p10k.RData")
p10k_n50_n10 <- bind_rows(p10k_n10, p10k_n50)
ggplot(p10k_n50_n10, aes(x = consist)) +
  geom_density(aes(group = set_id), color = "gray") +
  geom_rug(aes(quantile05, group = set_id)) + theme_bw() +
  xlab("consistency value of permutation") + 
  facet_wrap(~cases, ncol = 1) +
  ggtitle("Figure 5: Consistency value distributions and 5%-quantiles for ten simulations") +
  theme(plot.title = element_text(hjust = 0.5))
```

A sina plot (figure 6 in manuscript) gives a comprehensive perspective on the permuted distributions by plotting their left-sided 5%-quantiles for all simulations summarized in figure 2. 
```{r sina plot, dependson=6, fig.height=10}
ggplot(s1k_p10k, aes(factor(cases), quant)) + 
  geom_sina(aes(color = cases), size = 0.1) + 
  geom_hline(yintercept = c(0.8, 0.95), linetype = 5, color = "gray") +
  labs(y = "5% quantile", x = "") + theme_bw() + guides(fill = F) +
  ylim(c(0, 1)) +
  scale_x_discrete(labels = c("n=10", "n=20", "n=30", "n=40", "n=50")) +
  theme(legend.position = "") + facet_wrap(~h1, ncol = 1) +
  ggtitle(expression('Figure 6: Distribution of 5%-quantiles for five values of c'[H1])) 
```

### *Ex post* power estimation
Loading paramaters of empirical QCA studies (consistency threshold for assigning outcome values to truth table rows and number of cases) .
```{r empdesc}
qcapar <- read.csv(file = "qcapar_total.csv", sep = ";", header = T)
```

The number of articles across which the 63 truth table analyses are distributed.
```{r no of studies}
length(unique(substr(as.character(qcapar$author), 1, 
                     nchar(as.character(qcapar$author))-1)))
```

The number of articles per assumed value of the alternative hypthesis is:
```{r no of studies per H1}
qcapar_n <- function(x) { 
  length(which(qcapar$null <= x))
}
study.n <- sapply(c(1, 0.949, 0.899, 0.849, 0.799), qcapar_n)
H1 <- c(1, 0.95, 0.9, 0.85, 0.8)
print(rbind(H1, study.n))
```

A scatterplot of the number of cases and values of the null hypothesis used for preparing the truth table analysis (figure 7 in manuscript).
```{r scatterplot parameters}
ggplot(qcapar, aes(cases, null)) + geom_point() + theme_bw() +
  labs(x = "cases", y = "consistency threshold") + ylim(0.7, 1) +
  ggtitle("Figure 7: Parameters of empirical QCA studies")
summary(qcapar$cases)
summary(qcapar$null)
```

Function `qcapower_short()` for *ex post* power analysis of empirical studies. This function only outputs the power value for each simulation. The functions `get_sims_params()` and `cons_values()`  make sure that power estimates are produced for all possible values of c(H1) if the difference between c(H1) and c(H0) is equal to or larger than 0.05.
```{r ex post simulation, eval = F}
qcapower_short <- function(cases, null, cons_target,
                     cons_threshold = 0.01, alpha = 0.05, sims = 1000, perms = 10000,
                     complete_results = FALSE, set_seed = 135) {
  nullsig <- vector("numeric", length = sims) 
  nullp <- vector("numeric", length = sims) 

  set.seed(set_seed)
  calc_consistency <- function(a, y) sum(pmin(a, y)) / sum(a)
  df <- data.frame(index=1:cases)

  for(i in 1:sims) {
    df$a <- round(runif(cases, min = 0, max = 1), digits = 2) 
    df$y <- round(runif(cases, min = 0, max = 1), digits = 2) 
    cons <- calc_consistency(df$a, df$y)
    while(abs(cons_target - cons) > cons_threshold) {
      if(cons < cons_target) {
        sample_row <- sample(df[df$a > df$y, 'index'], 1)
        df[sample_row, 'y'] <- runif(1, min = df[sample_row, 'y'], max = 1)
      } else if(cons > cons_target) {
        sample_row <- sample(df$index, 1)
        df[sample_row, 'y'] <- runif(1, min = 0, max = df[sample_row, 'y'])
      }
      cons <- calc_consistency(df$a, df$y)
    }
    cons_perms <- sapply(1:perms, function(x) calc_consistency(df$a, sample(df$y))) 
    nullp[i] <- ecdf(cons_perms) (null) 
    nullsig[i] <- ifelse(ecdf(cons_perms) (null) +
                           1.96*(ecdf(cons_perms) (null))/sqrt(perms)
                         < alpha, 1, 0) 
  }

  power <- sum(nullsig) / sims 
  
  if (complete_results) {
    powercum <- cumsum(nullsig) / 1:sims  
    estim <- data.frame(powercum, nullp, power, cons_target, null, cases) 
    return(estim)
  } else {
    return(power)
  }
}

get_sim_params <- function(hypo_null, hypo_max, steps = 0.05, null_diff = 0.05) {
  params <- list(set_seed = 111)
  
  cons_params <- rev(seq(from = hypo_null, to = hypo_max, by = steps))
  if( ! hypo_max %in% cons_params) cons_params <- c(hypo_max, cons_params)
  null_params <- cons_params - null_diff
  
  sim_params <- expand.grid(cons_target = cons_params, null = null_params)
  sim_params <- sim_params[sim_params$cons_target > (sim_params$null + 1e-10), ] 
  return(sim_params)
}

cons_values <- function(hypo_null, hypo_max = 1.0, steps = 0.05) {
  cons_params <- seq(0, hypo_max, by = steps)
  cons_params <- cons_params[cons_params > hypo_null | cons_params == hypo_max]
  return(cons_params)
}

qcapar_sim <- qcapar %>%
  rowwise %>%
  do(data.frame(., cons_target = cons_values(.$null)))
qcapar_sim$diff <- abs(qcapar_sim$null-qcapar_sim$cons_target)
qcapar_sim <- filter(qcapar_sim, diff > 0.001)

qcaemp <- qcapar_sim %>%
  rowwise %>%
  mutate(power = qcapower_short(cases, null, cons_target))
```

Loading stored results. Summarizing results of *ex post* power analysis in box plots (figure 8 in manuscript).
```{r qcaempanal, warning = F, message = F}
load("qcaemp.RData")
qcaemp <- filter(qcaemp, cons_target > 0.76) 
qe.box <- ggplot(qcaemp, aes(factor(cons_target), y = power)) 
qe.box + geom_boxplot(width = 0.2) + theme_bw() + 
  labs(x = expression('c'[H1])) +
  theme(legend.position="none") + 
  scale_x_discrete(labels = c("0.8" = "0.8 (n=14)",
                              "0.85" = "0.85 (n=36)",
                              "0.9" = "0.9 (n=45)",
                              "0.95" = "0.95 (n=59)",
                              "1" = "1 (n=63)")) + 
  ggtitle("Figure 8: Ex post power estimates for 63 truth table analyses")
```

Function for calculating quartiles for each value of c(H1) (tabular presentation of information in figure 8).
```{r qcaemp_quart}
qcaquant <- function(x){
  temp <- filter(qcaemp, cons_target > x-0.01 & cons_target < x+0.01)
  quantile(temp$power)
}

qcaemp_quart <- as.data.frame(sapply(seq(1, 0.8, by = -0.05), qcaquant))
qcaemp_quart <- rename(qcaemp_quart, "c(H1)=1" = V1, "c(H1)=0.95" = V2, 
                       "c(H1)=0.9" = V3, "c(H1)=0.85" = V4, "c(H1)=0.8" = V5)
print(qcaemp_quart)
```

```{r qcaemp1, echo = F}
qcaemp1 <- filter(qcaemp, cons_target > 1-0.001)
```

For c(H1)=1, the number of studies with power of less than 0.8 is `r length(which(qcaemp1$power < 0.8))`.  
For c(H1)=1, the power estimate of the first quartile is `r round(quantile(qcaemp1$power, probs = 0.25), digits = 2)`.  
For c(H1)=1, the number of studies in the first quartile is `r length(which(qcaemp1$power <= round(quantile(qcaemp1$power, probs = 0.25), digits = 2)))`.

```{r qcaemp095, echo = F}
qcaemp095 <- filter(qcaemp, cons_target > 0.95-0.001 &
                      cons_target < 0.95+0.001)
```

For c(H1)=0.95, the median power estimate is `r round(quantile(qcaemp095$power, probs = 0.5), digits = 2)`.  
For c(H1)=0.95, the number of studies with power less than the median estimate is `r length(which(qcaemp095$power < round(quantile(qcaemp095$power, probs = 0.5), digits = 2)))`.  
For c(H1)=0.95, the number of studies with power of 0.8 or more is `r length(which(qcaemp095$power >= 0.8))`.  
For c(H1)=0.95, the number of studies in the first quartile is `r length(which(qcaemp095$power <= round(quantile(qcaemp095$power, probs = 0.25), digits = 2)))`.  

```{r qcaemp090, echo = F}
qcaemp090 <- filter(qcaemp, cons_target > 0.90-0.001 &
                      cons_target < 0.90+0.001)
```

For c(H1)=0.9, the number of studies with power of 0.8 or more is `r length(which(qcaemp090$power >= 0.8))`. 

```{r qcaemp085, echo = F}
qcaemp085 <- filter(qcaemp, cons_target > 0.85-0.001 &
                      cons_target < 0.85+0.001)
```

For c(H1)=0.85, the number of studies with power of 0.8 or more is `r length(which(qcaemp085$power >= 0.8))`. 

For c(H1)=0.85, the number of studies with power of 0.5 or less is `r length(which(qcaemp085$power < 0.5))`. 

### Simulating the required number of cases
The function `qcapower_cases()` is a modification of the `qcapower()` function that only outputs the power value as the target parameter in this section.
```{r qcapower_cases}
qcapower_cases <- function(cases, null_hypo, alt_hypo, sims = 1000, perms = 10000,
                     alpha = 0.05, cons_threshold = 0.01, set_seed = 135) {
  null_sig <- vector("numeric", length = sims)
  null_p <- vector("numeric", length = sims)
  quant <- vector("numeric", length = sims)

  set.seed(set_seed)
  calc_consistency <- function(a, y) sum(pmin(a, y)) / sum(a)
  random_draw <- function() round(runif(cases), digits = 2)
  df <- data.frame(index=1:cases)

  for(i in 1:sims) {
    df$a <- random_draw()  
    df$y <- random_draw()
    cons <- calc_consistency(df$a, df$y)
    while(abs(alt_hypo - cons) > cons_threshold) {
      if(cons < alt_hypo) {
        sample_row <- sample(df[df$a > df$y, 'index'], 1)
        df[sample_row, 'y'] <- runif(1, min = df[sample_row, 'y'], max = 1)
      } else if(cons > alt_hypo) {
        sample_row <- sample(df[ , 'index'], 1)
        df[sample_row, 'y'] <- runif(1, min = 0, max = df[sample_row, 'y'])
      }
      cons <- calc_consistency(df$a, df$y)
    }
    cons_perms <- sapply(1:perms, function(x) calc_consistency(df$a, sample(df$y))) 
    quant[i] <- quantile(cons_perms, 0.05)
    null_p[i] <- ecdf(cons_perms)(null_hypo) # p-value of null-hypo
    null_sig[i] <- ifelse(null_p[i] + 1.96 * null_p[i] / sqrt(perms) < alpha, 1, 0) 
  }
  power <- sum(null_sig) / sims  
  powercum <- cumsum(null_sig) / 1:sims  
  estim <- data.frame(power, alt_hypo, null_hypo, cases)

  return(estim)
}
```

The power calculations in the following chunk approach the target power levels of 0.7, 0.8 and 0.9 for prespecified numbers of cases, which then, in turn, equals the required number for achieving the target level of power (table 1 in the manuscript).
```{r required cases, eval = F}
h1_08_p70 <- qcapower_cases(10, null_hypo = 0.8, alt_hypo = 1, 
                            sims = 1000, perms = 10000, alpha = 0.05, 
                            cons_threshold = 0.01, set_seed = 321)$power
h1_08_p80 <- qcapower_cases(20, null_hypo = 0.8, alt_hypo = 1, 
                            sims = 1000, perms = 10000, alpha = 0.05, 
                            cons_threshold = 0.01, set_seed = 321)$power
h1_08_p90 <- qcapower_cases(40, null_hypo = 0.8, alt_hypo = 1, 
                            sims = 1000, perms = 10000, alpha = 0.05, 
                            cons_threshold = 0.01, set_seed = 321)$power

h095_08_p70 <- qcapower_cases(43, null_hypo = 0.8, alt_hypo = 0.95, 
                              sims = 1000, perms = 10000, alpha = 0.05, 
                              cons_threshold = 0.01, set_seed = 321)$power
h095_08_p80 <- qcapower_cases(57, null_hypo = 0.8, alt_hypo = 0.95, 
                              sims = 1000, perms = 10000, alpha = 0.05, 
                              cons_threshold = 0.01, set_seed = 321)$power
h095_08_p90 <- qcapower_cases(80, null_hypo = 0.8, alt_hypo = 0.95, 
                              sims = 1000, perms = 10000, alpha = 0.05, 
                              cons_threshold = 0.01, set_seed = 321)$power

h09_075_p70 <- qcapower_cases(25, null_hypo = 0.75, alt_hypo = 0.9, 
                              sims = 1000, perms = 10000, alpha = 0.05, 
                              cons_threshold = 0.01, set_seed = 321)$power
h09_075_p80 <- qcapower_cases(35, null_hypo = 0.75, alt_hypo = 0.9, 
                              sims = 1000, perms = 10000, alpha = 0.05, 
                              cons_threshold = 0.01, set_seed = 321)$power
h09_075_p90 <- qcapower_cases(50, null_hypo = 0.75, alt_hypo = 0.9, 
                              sims = 1000, perms = 10000, alpha = 0.05, 
                              cons_threshold = 0.01, set_seed = 321)$power

h085_075_p70 <- qcapower_cases(90, null_hypo = 0.75, alt_hypo = 0.85, 
                               sims = 1000, perms = 10000, alpha = 0.05, 
                               cons_threshold = 0.01, set_seed = 321)$power
h085_075_p80 <- qcapower_cases(125, null_hypo = 0.75, alt_hypo = 0.85, 
                               sims = 1000, perms = 10000, alpha = 0.05, 
                               cons_threshold = 0.01, set_seed = 321)$power
h085_075_p90 <- qcapower_cases(180, null_hypo = 0.75, alt_hypo = 0.85, 
                               sims = 1000, perms = 10000, alpha = 0.05, 
                               cons_threshold = 0.01, set_seed = 321)$power

h08_075_p70 <- qcapower_cases(4600, null_hypo = 0.75, alt_hypo = 0.8, 
                              sims = 1000, perms = 10000, alpha = 0.05, 
                              cons_threshold = 0.01, set_seed = 111)$power
h08_075_p80 <- qcapower_cases(6500, null_hypo = 0.75, alt_hypo = 0.8, 
                              sims = 1000, perms = 10000, alpha = 0.05, 
                              cons_threshold = 0.01, set_seed = 111)$power
h08_075_p90 <- qcapower_cases(9000, null_hypo = 0.75, alt_hypo = 0.8, 
                              sims = 1000, perms = 10000, alpha = 0.05, 
                              cons_threshold = 0.01, set_seed = 111)$power
```

Loading stored results.
```{r needed_n}
load("neededn.RData")
```
The following list presents the required number of cases (after the colon) and the estimated level of power (in brackets at the end) for the given values of c(H1), c(H0) and the target power level:  

* c(H1)=1, c(H0)=0.8:  
    + target power=0.7: 10 (`r h1_08_p70`)  
    + target power=0.8: 20 (`r h1_08_p80`)  
    + target power=0.9: 40 (`r h1_08_p90`)  
* c(H1)=0.95, c(H0)=0.8:
    + target power=0.7: 43 (`r h095_08_p70`)
    + target power=0.8: 57 (`r h095_08_p80`)
    + target power=0.9: 80 (`r h095_08_p90`)
* c(H1)=0.9, c(H0)=0.75:
    + target power=0.7: 25 (`r h09_075_p70`)
    + target power=0.8: 35 (`r h09_075_p80`)
    + target power=0.9: 50 (`r h09_075_p90`)
* c(H1)=0.85, c(H0)=0.75:
    + target power=0.7: 90 (`r h085_075_p70`)
    + target power=0.8: 125 (`r h085_075_p80`)
    + target power=0.9: 180 (`r h085_075_p90`)
* c(H1)=0.8, c(H0)=0.75:
    + target power=0.7: 4600 (`r h08_075_p70`)
    + target power=0.8: 6500 (`r h08_075_p80`)
    + target power=0.9: 9000 (`r h08_075_p90`)


### Appendix
As a test for the potential dependence of the simulation results on the chosen `set.seed` number, the `setfunct()` functions runs ten simulations with identical parameters except for the `set.seed` parameter. The `set.seed` numbers run from 1 to 91 in steps of 10.
```{r set_depend function, eval = F}
setfunct <- function(sf){
  qcapower(cases = 10, null_hypo = 0.8, alt_hypo = 1, 
           sims = 1000, set_seed = sf, perms = 10000)$power
}

set_depend <- sapply(seq(1, 91, by = 10), setfunct)
```

Loading stored results and creating point chart with power estimates against `set.seed` numbers (figure A.1 in appendix).
```{r set_depend analysis}
load("set_depend.RData")
set_depend <- as.data.frame(set_depend) %>% 
  summarise_each(funs(mean)) %>% 
  rename("1" = V1, "11" = V2, "21" = V3, "31" = V4, "41" = V5,
         "51" = V6, "61" = V7, "71" = V8, "81" = V9, "91" = V10) %>% 
  gather("set_id", "power", 1:10)
ggplot(set_depend, aes(set_id, power)) + geom_point() +
  theme_bw() + ylim(c(0, 1)) + labs(x = "set.seed() number") +
  ggtitle("Figure A.1: Test for sensitivity of power estimate to set.seed() number")
```

Calculating power estimates for n=10, n=100 and n=1000 to explore in more detail how the estimates depend on the values of C(H1), c(H0) and n.
```{r inversed power, eval = F}
h1_cases <- data.frame(matrix(nrow = 10000, ncol = 0))
h1_cases <- h1_cases %>% 
  mutate("n=10" = qcapower_exp(cases = 10, null_hypo = 0.95, 
                            alt_hypo = 1, sims = 1, set_seed = 354,
                                         perms = 10000)$cons_perms) %>% 
  mutate("n=100" = qcapower_exp(cases = 100, null_hypo = 0.95, 
                            alt_hypo = 1, sims = 1, set_seed = 354,
                                         perms = 10000)$cons_perms) %>% 
  mutate("n=1000" = qcapower_exp(cases = 1000, null_hypo = 0.95, 
                            alt_hypo = 1, sims = 1, set_seed = 354,
                                         perms = 10000)$cons_perms) %>% 
  mutate("H1" = 1) %>% 
  gather("cases", "consperms", 1:3)

h09_cases <- data.frame(matrix(nrow = 10000, ncol = 0))
h09_cases <- h09_cases %>% 
  mutate("n=10" = qcapower_exp(cases = 10, null_hypo = 0.75, 
                            alt_hypo = 0.9, sims = 1, set_seed = 354,
                                         perms = 10000)$cons_perms) %>% 
  mutate("n=100" = qcapower_exp(cases = 100, null_hypo = 0.75, 
                            alt_hypo = 0.9, sims = 1, set_seed = 354,
                                         perms = 10000)$cons_perms) %>% 
  mutate("n=1000" = qcapower_exp(cases = 1000, null_hypo = 0.75, 
                            alt_hypo = 0.9, sims = 1, set_seed = 354,
                                         perms = 10000)$cons_perms) %>% 
  mutate("H1" = 0.9) %>% 
  gather("cases", "consperms", 1:3)

h08_cases <- data.frame(matrix(nrow = 10000, ncol = 0))
h08_cases <- h08_cases %>% 
  mutate("n=10" = qcapower_exp(cases = 10, null_hypo = 0.75, 
                            alt_hypo = 0.8, sims = 1, set_seed = 354,
                                         perms = 10000)$cons_perms) %>% 
  mutate("n=100" = qcapower_exp(cases = 100, null_hypo = 0.75, 
                            alt_hypo = 0.8, sims = 1, set_seed = 354,
                                         perms = 10000)$cons_perms) %>% 
  mutate("n=1000" = qcapower_exp(cases = 1000, null_hypo = 0.75, 
                            alt_hypo = 0.8, sims = 1, set_seed = 354,
                                         perms = 10000)$cons_perms) %>% 
  mutate("H1" = 0.8) %>% 
  gather("cases", "consperms", 1:3)
inv_power <- bind_rows(h1_cases, h09_cases, h08_cases) 
```

Loading stored results and generating density plots (figure A.2 in appendix).
```{r inversed power plot, fig.heigt=8}
load("inv_power.RData")
inv_power <- inv_power %>% 
  mutate(h_order = factor(H1, ordered = T)) %>% 
  mutate(h_order = factor(h_order, levels = rev(levels(h_order))))
ggplot(inv_power, aes(x = consperms)) + theme_bw() +
  geom_density(aes(color = cases)) +
  theme(legend.title = element_blank()) +
  xlab("consistency value of permutation") +
  facet_wrap(~h_order, ncol = 1) +
  ggtitle(expression('Figure A.2: Permutations for three values of c'[H1]))
```

Creating one exact permuted distribution for one simulation to compare exact permutation with randomized permutation.
```{r exact permutation, eval = F}
data_simul <- function(cases, null_hypo, alt_hypo, sims = 1, alpha = 0.05, 
                       cons_threshold = 0.01, set_seed = 12) {
  
  set.seed(set_seed)
  calc_consistency <- function(a, y) sum(pmin(a, y)) / sum(a)
  random_draw <- function() round(runif(cases), digits = 2)
  df <- data.frame(index=1:cases)

  for(i in 1:sims) {
    df$a <- random_draw()  
    df$y <- random_draw()
    cons <- calc_consistency(df$a, df$y)
    while(abs(alt_hypo - cons) > cons_threshold) {
      if(cons < alt_hypo) {
        sample_row <- sample(df[df$a > df$y, 'index'], 1)
        df[sample_row, 'y'] <- runif(1, min = df[sample_row, 'y'], max = 1)
      } else if(cons > alt_hypo) {
        sample_row <- sample(df[ , 'index'], 1)
        df[sample_row, 'y'] <- runif(1, min = 0, max = df[sample_row, 'y'])
      }
      cons <- calc_consistency(df$a, df$y)
    }
  }
  estim <- df

  return(estim)
}

df_1_095 <- data_simul(cases = 10, null_hypo = 0.95, alt_hypo = 1)

y_perm <- unlist(permn(df_1_095$y))
a_perm <- rep(df_1_095$a, length(y_perm)/nrow(df_1_095))
data_id <- sort(rep(seq(length(y_perm)/nrow(df_1_095)), 10))
exact_perm <- data.frame(a_perm, y_perm, data_id)
exact_perm <- exact_perm %>% # Could be merged with previous line
  group_by(data_id) %>% 
  mutate(consist_id = sum(pmin(a_perm, y_perm))/sum(a_perm)) %>% 
  summarise(consist = mean(consist_id)) %>% 
  mutate(permid = "exact") %>% 
  ungroup(data_id)
```

Loading stored results and creating density plots (figure A.3 in appendix).
```{r exact permutation cons}
load("exact_perm.RData")
exact_perm_s1 <- slice(exact_perm, 1:10000) %>% 
  mutate(permid = "slice 1")
exact_perm_s2 <- slice(exact_perm, 10001:20000) %>% 
  mutate(permid = "slice 2")
exact_perm_s3 <- slice(exact_perm, 100001:110000) %>% 
  mutate(permid = "slice 3")
perm_expand <- rbind(exact_perm, exact_perm_s1,
                     exact_perm_s2, exact_perm_s3)
ggplot(perm_expand, aes(consist, color = factor(permid))) + 
  geom_density() + theme_bw() + 
  xlab("consistency value of permutation") +
  theme(legend.title = element_blank()) + 
  ggtitle("Figure A.3: Exact distribution of consistency values for ten cases")
```
 
The 5%-quantiles for the exact distribution and three subsets are nearly identical with `r round(quantile(exact_perm$consist, 0.05), digits = 2)` for the exact distribution and `r round(quantile(exact_perm_s1$consist, 0.05), digits = 2)`, `r round(quantile(exact_perm_s2$consist, 0.05), digits = 2)` and `r round(quantile(exact_perm_s3$consist, 0.05), digits = 2)` for the three arbitrarily chosen slices each covering 10000 permutations.
